library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(coefplot)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(iterators)
library(parallel)
library(doParallel)
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## 
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
df <- readr::read_csv("paint_project_train_data.csv", col_names = TRUE)
## Rows: 835 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Lightness, Saturation
## dbl (6): R, G, B, Hue, response, outcome
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dfiiiA <- df %>% 
  select(-response)

dfiiiA %>% glimpse()
## Rows: 835
## Columns: 7
## $ R          <dbl> 172, 26, 172, 28, 170, 175, 90, 194, 171, 122, 0, 88, 144, …
## $ G          <dbl> 58, 88, 94, 87, 66, 89, 78, 106, 68, 151, 121, 140, 82, 163…
## $ B          <dbl> 62, 151, 58, 152, 58, 65, 136, 53, 107, 59, 88, 58, 132, 50…
## $ Lightness  <chr> "dark", "dark", "dark", "dark", "dark", "dark", "dark", "da…
## $ Saturation <chr> "bright", "bright", "bright", "bright", "bright", "bright",…
## $ Hue        <dbl> 4, 31, 8, 32, 5, 6, 34, 10, 1, 21, 24, 22, 36, 16, 26, 12, …
## $ outcome    <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,…
standardize <- function(x) {
  return ((x - mean(x)) / sd(x))
}


dfiiiA_standard <- dfiiiA
# Apply the function to the variables
dfiiiA_standard$R <- standardize(dfiiiA$R)
dfiiiA_standard$G <- standardize(dfiiiA$G)
dfiiiA_standard$B <- standardize(dfiiiA$B)
dfiiiA_standard$Hue <- standardize(dfiiiA$Hue)

dfiiiA_standard %>% glimpse()
## Rows: 835
## Columns: 7
## $ R          <dbl> -0.19790120, -2.74419521, -0.19790120, -2.70931447, -0.2327…
## $ G          <dbl> -2.3619736, -1.7631189, -1.6433480, -1.7830807, -2.2022790,…
## $ B          <dbl> -1.7994266, -0.1706872, -1.8726283, -0.1523868, -1.8726283,…
## $ Lightness  <chr> "dark", "dark", "dark", "dark", "dark", "dark", "dark", "da…
## $ Saturation <chr> "bright", "bright", "bright", "bright", "bright", "bright",…
## $ Hue        <dbl> -1.3548215, 1.3198239, -0.9585777, 1.4188848, -1.2557605, -…
## $ outcome    <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,…

##Part iii: Classification - iiiA) GLM #A1: Intercept-only model – no INPUTS

modA <- glm(outcome ~ 1, data = dfiiiA_standard, family = binomial)
#modA %>% summary()

#A2: Categorical variables only – linear additive

modB <- glm(outcome ~ Lightness + Saturation, data = dfiiiA_standard, family = binomial)

#A3: Continuous variables only – linear additive

modC <- glm(outcome ~ R + G + B + Hue, data = dfiiiA_standard, family = binomial)

#A4: Linear additive features using all inputs (categorical and continuous)

modD <- glm(outcome ~ ., data = dfiiiA_standard, family = binomial)

#A5: Interact the categorical input with the main continuous inputs

modE <- glm(outcome ~ (Lightness + Saturation) * ( R + G + B + Hue), data = dfiiiA_standard, family = binomial)

#A6: Add the categorical input to linear main effects and all pairwise interaction of the continuous inputs

modF <- glm(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2, data = dfiiiA_standard, family = binomial)

#A7: Interaction of the categorical input with all main effect and all pairwise interaction of the continuous inputs

modG <- glm(outcome ~ (Lightness + Saturation) * ( R + G + B + Hue)^2, data = dfiiiA_standard, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

#A8: Try non-linear basis functions based on your EDA.

modH <- glm(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2 + I(R^2) + I(G^2) + I(B^2) + I(Hue^2), data = dfiiiA_standard, family = binomial)

#A9: Can consider interactions of basis functions with other basis functions!

modI <- glm(outcome ~ Lightness + Saturation + R*G*B*Hue, data = dfiiiA_standard, family = binomial)

#A10:Can consider interactions of basis functions with the categorical inputs!

modJ <- glm(outcome ~ (Lightness + Saturation) * R*G*B*Hue, data = dfiiiA_standard, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

#A11:Did you experience any issues or warnings while fitting the generalized linear models?**

Yes. It shows a warning that glm.fit: fitting possibility is 0 or 1 occurred.

#A12:What performance metric did you use to make your selection?

modA %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -449.  900.  905.     898.         834   835
modB %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -357.  740.  801.     714.         822   835
modC %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -434.  878.  902.     868.         830   835
modD %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -348.  731.  811.     697.         818   835
modE %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -289.  707. 1014.     577.         770   835
modF %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -315.  677.  786.     631.         812   835
modG %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -147.  581. 1257.     295.         692   835
modH %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -293.  640.  768.     586.         808   835
modI %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834  -289.  634.  767.     578.         807   835
modJ %>% broom::glance()
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          898.     834 -3172. 6760. 7743.    6344.         627   835
bic_values <- c(
  BIC(modA),
  BIC(modB),
  BIC(modC),
  BIC(modD),
  BIC(modE),
  BIC(modF),
  BIC(modG),
  BIC(modH),
  BIC(modI),
  BIC(modJ)
)

names(bic_values) <- c("modA", "modB", "modC", "modD", "modE", "modF", "modG", "modH", "modI", "modJ")

# Find the model with the lowest BIC
best_model_name <- names(bic_values)[which.min(bic_values)]
best_bic_value <- min(bic_values)

# Print out the best model
best_model_name
## [1] "modI"
best_bic_value
## [1] 766.8026

#A13:Visualize the coefficient summaries for your top 3 models.

# Get the summary of the model
summary_modI <- summary(modI)
summary_modH <- summary(modH)
summary_modF <- summary(modF)

# Extract coefficients
coef_modI <- summary_modI$coefficients
coef_modH <- summary_modH$coefficients
coef_modF <- summary_modF$coefficients
df_coef <- rbind(
  data.frame(term = rownames(coef_modI), estimate = coef_modI[, "Estimate"], std_error = coef_modI[, "Std. Error"], model = "modI"),
  data.frame(term = rownames(coef_modH), estimate = coef_modH[, "Estimate"], std_error = coef_modH[, "Std. Error"], model = "modH"),
  data.frame(term = rownames(coef_modF), estimate = coef_modF[, "Estimate"], std_error = coef_modF[, "Std. Error"], model = "modF")
)
# Calculate confidence intervals
df_coef$CI_lower <- df_coef$estimate - 1.96 * df_coef$std_error
df_coef$CI_upper <- df_coef$estimate + 1.96 * df_coef$std_error
# Plotting
ggplot(df_coef, aes(x = term, y = estimate, ymin = CI_lower, ymax = CI_upper, colour = model)) +
  geom_pointrange() +
  theme_minimal() +
  labs(title = "Coefficient Estimates with 95% CIs") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  coord_flip()  # Flip coordinates for horizontal layout

it seems that certain levels of “Saturation” (like “Saturationmuted”, “Saturationneutral”, etc.), “Lightness” levels (like “Lightnessdeep”, “Lightnessslight”, etc.), and some of the interaction terms (like “R:G”, “R:B:Hue”, “R:G:Hue”, etc.) have coefficients that are notably different from zero, which suggests they are important in predicting the outcome.

##Part iii: Classification – iiiB) Bayesian GLM #B1: Fit 2 Bayesian generalized linear models – one must be the best model from iiiA) and the second must be another model you fit in iiiA).

model I, the best model model H, the second best model

#B2: You may use the Laplace Approximation approach we used in lecture and the homework assignments.

Xmat_I <- model.matrix(outcome ~ Lightness + Saturation + R*G*B*Hue, data = dfiiiA_standard)
Xmat_H <- model.matrix(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2 + I(R^2) + I(G^2) + I(B^2) + I(Hue^2), data = dfiiiA_standard)

use a prior mean of 0 and a prior standard deviation of 4.5.

info_I <- list(
  yobs = dfiiiA_standard$outcome,
  design_matrix = Xmat_I,
  mu_beta = 0,
  tau_beta = 4.5 
)

info_H <- list(
  yobs = dfiiiA_standard$outcome,
  design_matrix = Xmat_H,
  mu_beta = 0,
  tau_beta = 4.5 
)
logistic_logpost <- function(unknowns, my_info)
{
  X <- my_info$design_matrix
  eta <- X %*% unknowns
  mu <- boot::inv.logit(eta)
  # evaluate the log-likelihood
  log_lik <- sum(dbinom(x = my_info$yobs, size = 1, prob = mu, log = TRUE ))
  # evaluate the log-prior
  log_prior <- sum(dnorm(x = unknowns, mean = my_info$mu_beta, sd = my_info$tau_beta, log = TRUE ))
  # sum together
  log_lik + log_prior
}

Now define the my_laplace() function.

my_laplace <- function(start_guess, logpost_func, ...)
{
  # code adapted from the `LearnBayes`` function `laplace()`
  fit <- optim(start_guess,
               logpost_func,
               gr = NULL,
               ...,
               method = "BFGS",
               hessian = TRUE,
               control = list(fnscale = -1, maxit = 5001))
  
  mode <- fit$par
  post_var_matrix <- -solve(fit$hessian)
  p <- length(mode)
  int <- p/2 * log(2 * pi) + 0.5 * log(det(post_var_matrix)) + logpost_func(mode, ...)
  # package all of the results into a list
  list(mode = mode,
       var_matrix = post_var_matrix,
       log_evidence = int,
       converge = ifelse(fit$convergence == 0,
                         "YES", 
                         "NO"),
       iter_counts = as.numeric(fit$counts[1]))
}

Perform the Laplace Approximation for 2 models with an initial guess of zero for all unknowns for each model.

laplace_I <- my_laplace(rep(0,ncol(Xmat_I)), logistic_logpost, info_I )

laplace_H <- my_laplace(rep(0,ncol(Xmat_H)), logistic_logpost, info_H )

We have trained 2 Bayesian logistic regression models. Now, it is to identify the best using the Evidence based approach.

Calculate the posterior model weight associated with each of the 2 models. Create a histogram that shows the posterior model weight for each model.

evidence_laplace <- tibble::tibble( model = c('H', 'I'))

evidence_laplace <- evidence_laplace %>% mutate( evidence = exp(c(laplace_H$log_evidence, 
                      laplace_I$log_evidence)))

evidence_laplace <- evidence_laplace %>% mutate(weight = evidence/sum(evidence))

evidence_laplace %>% ggplot(aes(x = model, y = weight)) + geom_histogram(stat = "identity")
## Warning in geom_histogram(stat = "identity"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

model H is the best model

#B3:Visualize the regression coefficient posterior summary statistics for your best model.

laplace_H %>% glimpse()
## List of 5
##  $ mode        : num [1:27] -5.493 -0.248 0.163 0.618 0.48 ...
##  $ var_matrix  : num [1:27, 1:27] 1.272 -0.38 -0.936 -0.883 -0.93 ...
##  $ log_evidence: num -371
##  $ converge    : chr "YES"
##  $ iter_counts : num 97
viz_post_coefs <- function(post_means, post_sds, xnames)
{
  tibble::tibble(
    mu = post_means,
    sd = post_sds,
    x = xnames
  ) %>% 
    mutate(x = factor(x, levels = xnames)) %>% 
    ggplot(mapping = aes(x = x)) +
    geom_hline(yintercept = 0, color = 'grey', linetype = 'dashed') +
    geom_point(mapping = aes(y = mu)) +
    geom_linerange(mapping = aes(ymin = mu - 2 * sd,
                                 ymax = mu + 2 * sd,
                                 group = x)) +
    labs(x = 'feature', y = 'coefficient value') +
    coord_flip() 
}
post_mode_H <- laplace_H$mode
post_sd_H <- sqrt(diag(laplace_H$var_matrix))
xnames_H <- colnames(Xmat_H)

viz_post_coefs(post_mode_H, post_sd_H, xnames_H)

##Part iii: Classification – iiiC) GLM Predictions #C1:visualize the trends on a specifically designed prediction grid, defines a grid using the expand.grid() function

viz_grid <- expand.grid(R = seq(-3, 3, length.out=6),
                        G = seq(-2.5, 2.5, length.out=50),
                        B = 0,
                        Hue = 0,
                        Lightness = unique(dfiiiA_standard$Lightness),
                        Saturation = unique(dfiiiA_standard$Saturation),
                        KEEP.OUT.ATTRS = FALSE, 
                        stringsAsFactors = FALSE) %>% 
  as.data.frame() %>% tibble::as_tibble()

viz_grid %>% glimpse()
## Rows: 14,700
## Columns: 6
## $ R          <dbl> -3.0, -1.8, -0.6, 0.6, 1.8, 3.0, -3.0, -1.8, -0.6, 0.6, 1.8…
## $ G          <dbl> -2.500000, -2.500000, -2.500000, -2.500000, -2.500000, -2.5…
## $ B          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Hue        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Lightness  <chr> "dark", "dark", "dark", "dark", "dark", "dark", "dark", "da…
## $ Saturation <chr> "bright", "bright", "bright", "bright", "bright", "bright",…

Generate random posterior samples of the unknown parameters from the Laplace Approximation assumed Multivariate Normal (MVN) distribution

generate_glm_post_samples <- function(mvn_result, num_samples)
{
  length_beta <- length(mvn_result$mode)
  beta_samples <- MASS::mvrnorm(n = num_samples,
                                mu = mvn_result$mode,
                                Sigma = mvn_result$var_matrix)
  beta_samples %>% 
    as.data.frame() %>% tibble::as_tibble() %>% 
    purrr::set_names(sprintf("beta_%02d", (1:length_beta) - 1))
}
post_logistic_pred_samples <- function(Xnew, Bmat)
{
  eta_mat <- Xnew %*% t(Bmat)
  mu_mat <- boot::inv.logit(eta_mat)
  list(eta_mat = eta_mat, mu_mat = mu_mat)
}
summarize_logistic_pred_from_laplace <- function(mvn_result, Xtest, num_samples)
{
  betas <- generate_glm_post_samples(mvn_result, num_samples)
  betas <- as.matrix(betas)
  # make posterior predictions on the test set
  pred_test <- post_logistic_pred_samples(Xtest, betas)
  # posterior mean
  mu_avg <- rowMeans(pred_test$mu_mat)
  # posterior quantiles
  mu_q05 <- apply(pred_test$mu_mat, 1, stats::quantile, probs = 0.05)
  mu_q95 <- apply(pred_test$mu_mat, 1, stats::quantile, probs = 0.95)
  # book keeping
  tibble::tibble(
    mu_avg = mu_avg,
    mu_q05 = mu_q05,
    mu_q95 = mu_q95
  ) %>% 
    tibble::rowid_to_column("pred_id")
}
Xviz_H <- model.matrix( ~ (Lightness + Saturation) + ( R + G + B + Hue)^2 + I(R^2) + I(G^2) + I(B^2) + I(Hue^2), viz_grid)

Xviz_I <- model.matrix( ~  Lightness + Saturation + R*G*B*Hue, viz_grid)

Xviz_H %>% dim()
## [1] 14700    27
set.seed(1234) 

post_pred_summary_H <- summarize_logistic_pred_from_laplace( laplace_H, Xviz_H, 2500)

post_pred_summary_I <- summarize_logistic_pred_from_laplace( laplace_I, Xviz_I, 2500)
viz_bayes_logpost_preds <- function(post_pred_summary, input_df)
{
  post_pred_summary %>% 
    left_join(input_df %>% tibble::rowid_to_column('pred_id'),
              by = 'pred_id') %>% 
    ggplot(mapping = aes(x = G)) +
    geom_ribbon(mapping = aes(ymin = mu_q05,
                              ymax = mu_q95,
                              group = interaction(Saturation, R),
                              fill = Saturation),
                alpha = 0.25) +
    geom_line(mapping = aes(y = mu_avg,
                            group = interaction(Saturation, R),
                            color = Saturation),
              size = 0.75) +
    facet_wrap( ~ R, labeller = 'label_both') +
    labs(y = "event probability") 
}

#Model H

viz_bayes_logpost_preds(post_pred_summary_H, viz_grid)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Model I

viz_bayes_logpost_preds(post_pred_summary_I, viz_grid)

In Bayesian models, the predictive trends of 2 selected generalized linear models are not consistent.

##Part iii: Classification – iiiD) Train/tune with resampling

dfiiiD <- df %>% 
  select(-response) %>% 
  mutate(outcome = ifelse(outcome == 1, 'event', 'non_event'),
         outcome = factor(outcome, levels = c('event', 'non_event')))

dfiiiD %>% glimpse()
## Rows: 835
## Columns: 7
## $ R          <dbl> 172, 26, 172, 28, 170, 175, 90, 194, 171, 122, 0, 88, 144, …
## $ G          <dbl> 58, 88, 94, 87, 66, 89, 78, 106, 68, 151, 121, 140, 82, 163…
## $ B          <dbl> 62, 151, 58, 152, 58, 65, 136, 53, 107, 59, 88, 58, 132, 50…
## $ Lightness  <chr> "dark", "dark", "dark", "dark", "dark", "dark", "dark", "da…
## $ Saturation <chr> "bright", "bright", "bright", "bright", "bright", "bright",…
## $ Hue        <dbl> 4, 31, 8, 32, 5, 6, 34, 10, 1, 21, 24, 22, 36, 16, 26, 12, …
## $ outcome    <fct> event, event, event, non_event, non_event, non_event, non_e…
dfiiiD %>% pull(outcome) %>% levels()
## [1] "event"     "non_event"
my_ctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
my_metric <- 'Accuracy'

#Generalized linear models All categorical and continuous inputs - linear additive features

set.seed(1234)
glm_A <- train(outcome ~ ., 
              data = dfiiiD,
              method = "glm",
              family = binomial,
              metric = my_metric,
              preProcess = c("center", "scale"),
              trControl = my_ctrl)

glm_A %>% coefplot()

#Add categorical inputs to all main effect and all pairwise interactions of continuous inputs

set.seed(1234)
glm_B <- train(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2, 
              data = dfiiiD,
              method = "glm",
              family = binomial,
              metric = my_metric,
              preProcess = c("center", "scale"),
              trControl = my_ctrl)

#the 2 models selected from iiiA) (if they are not one of the two above)

set.seed(1234)
glm_C <- train(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2 + I(R^2) + I(G^2) + I(B^2) + I(Hue^2), 
              data = dfiiiD,
              method = "glm",
              family = binomial,
              metric = my_metric,
              preProcess = c("center", "scale"),
              trControl = my_ctrl)
set.seed(1234)
glm_D <- train(outcome ~ Lightness + Saturation + R*G*B*Hue, 
              data = dfiiiD,
              method = "glm",
              family = binomial,
              metric = my_metric,
              preProcess = c("center", "scale"),
              trControl = my_ctrl)

Regularized regression with Elastic net

set.seed(1234)
enet_B_default <- train(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2,
                      data = dfiiiD,
                      method = 'glmnet',
                      metric = my_metric,
                      preProcess = c('center', 'scale'),
                      trControl = my_ctrl  )
default_grid_B <- enet_B_default$results$lambda
alpha_seq <- seq(0.1, 1.0, by = .1)
lambda_seq <- exp(seq(log(min(enet_B_default$results$lambda)),
                          log(max(enet_B_default$results$lambda)),
                          length.out = 25))

enet_grid_B <- expand.grid(alpha = alpha_seq, lambda = lambda_seq)

enet_grid_B %>% dim()
## [1] 250   2
set.seed(1234)

enet_tune_B <- train(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2,
                      data = dfiiiD,
                      method = 'glmnet',
                      metric = my_metric,
                      preProcess = c('center', 'scale'),
                      trControl = my_ctrl,
                      tuneGrid = enet_grid_B )

plot(enet_tune_B, xTrans = log)

enet_tune_B$bestTune
##     alpha    lambda
## 166   0.7 0.0062658
set.seed(1234)
enet_C_default <- train(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2 + I(R^2) + I(G^2) + I(B^2) + I(Hue^2),
                      data = dfiiiD,
                      method = 'glmnet',
                      metric = my_metric,
                      preProcess = c('center', 'scale'),
                      trControl = my_ctrl  )
default_grid_C <- enet_C_default$results$lambda
alpha_seq <- seq(0.1, 1.0, by = .1)
lambda_seq <- exp(seq(log(min(enet_C_default$results$lambda)),
                          log(max(enet_C_default$results$lambda)),
                          length.out = 25))

enet_grid_C <- expand.grid(alpha = alpha_seq, lambda = lambda_seq)
set.seed(1234)

enet_tune_C <- train(outcome ~ (Lightness + Saturation) + ( R + G + B + Hue)^2 + I(R^2) + I(G^2) + I(B^2) + I(Hue^2),
                      data = dfiiiD,
                      method = 'glmnet',
                      metric = my_metric,
                      preProcess = c('center', 'scale'),
                      trControl = my_ctrl,
                      tuneGrid = enet_grid_C )

plot(enet_tune_C, xTrans = log)

enet_tune_C$bestTune
##     alpha       lambda
## 176   0.8 0.0003523518
set.seed(1234)
enet_D_default <- train(outcome ~ Lightness + Saturation + R*G*B*Hue,
                      data = dfiiiD,
                      method = 'glmnet',
                      metric = my_metric,
                      preProcess = c('center', 'scale'),
                      trControl = my_ctrl  )
default_grid_D <- enet_D_default$results$lambda
alpha_seq <- seq(0.1, 1.0, by = .1)
lambda_seq <- exp(seq(log(min(enet_D_default$results$lambda)),
                          log(max(enet_D_default$results$lambda)),
                          length.out = 25))

enet_grid_D <- expand.grid(alpha = alpha_seq, lambda = lambda_seq)
set.seed(1234)

enet_tune_D <- train(outcome ~ Lightness + Saturation + R*G*B*Hue,
                      data = dfiiiD,
                      method = 'glmnet',
                      metric = my_metric,
                      preProcess = c('center', 'scale'),
                      trControl = my_ctrl,
                      tuneGrid = enet_grid_D )

plot(enet_tune_D, xTrans = log)

enet_tune_D$bestTune
##     alpha    lambda
## 166   0.7 0.0062658

#Neutral network

registerDoParallel(cores=8)

set.seed(1234)
nnet_default <- caret::train( outcome ~ .,
                              data = dfiiiD,
                              method = 'nnet',
                              metric = my_metric,
                              preProcess = c('center', 'scale'),
                              trControl = my_ctrl,
                              trace = FALSE)
nnet_default$bestTune
##   size decay
## 9    5   0.1
nnet_default
## Neural Network 
## 
## 835 samples
##   6 predictor
##   2 classes: 'event', 'non_event' 
## 
## Pre-processing: centered (16), scaled (16) 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 751, 751, 751, 752, 752, 752, ... 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   1     0e+00  0.7901132  0.2249638
##   1     1e-04  0.7857149  0.1773288
##   1     1e-01  0.8176093  0.3750096
##   3     0e+00  0.7968932  0.4011665
##   3     1e-04  0.8092708  0.4306351
##   3     1e-01  0.8256274  0.4625307
##   5     0e+00  0.8015878  0.4143694
##   5     1e-04  0.7996609  0.3992548
##   5     1e-01  0.8315707  0.4964037
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
nnet_grid <- expand.grid( size = c(5,9,13,17,21), 
                          decay = exp(seq(-6, 0, length.out = 11)))
registerDoParallel(cores=8)

set.seed(1234)

nnet_tune <- caret::train( outcome ~ .,
                              data = dfiiiD,
                              method = 'nnet',
                              metric = my_metric,
                              preProcess = c('center', 'scale'),
                              trControl = my_ctrl,
                              tuneGrid = nnet_grid,
                              trace = FALSE)
nnet_tune %>% plot(xTrans = log)

nnet_tune$bestTune
##    size     decay
## 41   17 0.1652989

#Random Forest

registerDoParallel(cores=8)

set.seed(1234)

rf_default <- caret::train( outcome ~ .,
                            data = dfiiiD,
                            method = "rf",
                            trControl = my_ctrl,
                            metric = my_metric,
                            importance = TRUE)
rf_default$results
##   mtry  Accuracy     Kappa AccuracySD   KappaSD
## 1    2 0.8283860 0.3842260 0.03057507 0.1234320
## 2    9 0.8570302 0.5634829 0.03816794 0.1176811
## 3   16 0.8558347 0.5670017 0.03984956 0.1171684
rf_grid <- expand.grid(.mtry = (2:30))
registerDoParallel(cores=8)

set.seed(1234)

rf_tune <- caret::train( outcome ~ .,
                            data = dfiiiD,
                            method = "rf",
                            tuneGrid = rf_grid,
                            trControl = my_ctrl,
                            metric = my_metric,
                            importance = TRUE)
rf_tune$bestTune
##    mtry
## 14   15

#Gradient boosted tree

registerDoParallel(cores=8)

set.seed(1234)

xgb_default <- caret::train(outcome ~ .,
                            data = dfiiiD,
                            method = "xgbTree",
                            trControl = my_ctrl,
                            metric = my_metric,
                            verbosity = 0,
                            nthread = 1  )

xgb_default %>% plot()

refined tuning grid to improve the model

xgb_grid <- expand.grid(nrounds = seq(20, 200, by = 20),
                        max_depth = c(3, 6, 9, 12),
                        eta = c(0.5, 1, 1.5) * xgb_default$bestTune$eta,
                        gamma = xgb_default$bestTune$gamma,
                        colsample_bytree = xgb_default$bestTune$colsample_bytree,
                        min_child_weight = xgb_default$bestTune$min_child_weight,
                        subsample = xgb_default$bestTune$subsample)
registerDoParallel(cores=8)
set.seed(1234)

xgb_tune <- caret::train(outcome ~ .,
                            data = dfiiiD,
                            method = "xgbTree",
                            trControl = my_ctrl,
                            metric = my_metric,
                            tuneGrid = xgb_grid,
                            verbosity = 0,
                            nthread = 1  )
xgb_tune %>% plot()

xgb_tune$bestTune
##   nrounds max_depth eta gamma colsample_bytree min_child_weight subsample
## 5     100         3 0.2     0              0.6                1         1

#SVM

registerDoParallel(cores=8)

set.seed(1234)

svm_default <- caret::train( outcome ~ .,
                        data = dfiiiD,
                        method = "svmRadial",
                        preProcess = c('center', 'scale'),
                        trControl = my_ctrl,
                        metric = my_metric)
svm_default %>% plot()

svm_grid = expand.grid(C=10^(1:2), scale=10^(-6:0), degree = c(2, 3, 4))

set.seed(1234)
registerDoParallel(cores=8)


svm_tune <- train(outcome ~ .,
                      data = dfiiiD,
                      method = "svmPoly",
                      metric = my_metric,
                      tuneGrid = svm_grid,
                      trControl = my_ctrl )


svm_tune$bestTune
##    degree scale  C
## 19      2     1 10
svm_tune %>% plot()

#partial least squares(pls) model

registerDoParallel(cores=8)

set.seed(1234)

pls_default <- caret::train( outcome ~ .,
                        data = dfiiiD,
                        method = "pls",
                        preProcess = c('center', 'scale'),
                        trControl = my_ctrl,
                        metric = my_metric)
pls_default$results
##   ncomp  Accuracy     Kappa AccuracySD   KappaSD
## 1     1 0.8224046 0.3702471 0.03385942 0.1307737
## 2     2 0.8228110 0.3705918 0.03460558 0.1320145
## 3     3 0.8212046 0.3666353 0.03296195 0.1271738
registerDoParallel(cores=8)
set.seed(1234)
ncomp_values <- 1:10
pls_grid <- expand.grid(ncomp = ncomp_values)

pls_tune <- caret::train( outcome ~ .,
                        data = dfiiiD,
                        method = "pls",
                        preProcess = c('center', 'scale'),
                        trControl = my_ctrl,
                        tuneGrid = pls_grid,
                        metric = my_metric)

pls_tune$bestTune
##   ncomp
## 2     2
pls_tune$results
##    ncomp  Accuracy     Kappa AccuracySD   KappaSD
## 1      1 0.8224046 0.3702471 0.03385942 0.1307737
## 2      2 0.8228110 0.3705918 0.03460558 0.1320145
## 3      3 0.8212046 0.3666353 0.03296195 0.1271738
## 4      4 0.8211998 0.3673617 0.03298005 0.1280230
## 5      5 0.8216062 0.3675613 0.03316230 0.1279947
## 6      6 0.8220078 0.3685276 0.03350617 0.1290584
## 7      7 0.8220078 0.3685276 0.03350617 0.1290584
## 8      8 0.8220078 0.3685276 0.03350617 0.1290584
## 9      9 0.8220078 0.3685276 0.03350617 0.1290584
## 10    10 0.8220078 0.3685276 0.03350617 0.1290584

#Which model is the best if you are interested in maximizing Accuracy compared to maximizing the Area Under the ROC Curve (ROC AUC)?

# Combine the resampling results
caret_acc_compare <- resamples(list(
    glm_A = glm_A,
    glm_B = glm_B,
    glm_C = glm_C,
    glm_D = glm_D,
    enet_B = enet_tune_B,
    enet_C = enet_tune_C,
    enet_D = enet_tune_D,
    nnet_default = nnet_default,
    nnet_tune = nnet_tune,
    rf_default = rf_default,
    rf_tune = rf_tune,
    xgb_default = xgb_default,
    xgb_tune = xgb_tune,
    svm_default = svm_default,
    svm_tune = svm_tune,
    pls_tune = pls_tune
))

# Summary of models' performance based on Accuracy
model_summaries <- summary(caret_acc_compare)
accuracy_summary <- model_summaries$statistics$Accuracy

print(accuracy_summary)
##                   Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## glm_A        0.7710843 0.7951807 0.8192771 0.8188045 0.8452381 0.8795181    0
## glm_B        0.7500000 0.7857143 0.7988095 0.8092329 0.8288511 0.9036145    0
## glm_C        0.7619048 0.8078026 0.8192771 0.8288020 0.8447719 0.9156627    0
## glm_D        0.7469880 0.8072289 0.8235294 0.8275401 0.8614458 0.8928571    0
## enet_B       0.7619048 0.7951807 0.8192771 0.8240015 0.8528758 0.8915663    0
## enet_C       0.7619048 0.8095238 0.8214286 0.8295955 0.8524096 0.9036145    0
## enet_D       0.7619048 0.7951807 0.8192771 0.8240015 0.8528758 0.8915663    0
## nnet_default 0.7261905 0.8102410 0.8323293 0.8315707 0.8554217 0.9036145    0
## nnet_tune    0.7380952 0.8119621 0.8383534 0.8391580 0.8674699 0.9156627    0
## rf_default   0.7710843 0.8313253 0.8521008 0.8570302 0.8809524 0.9166667    0
## rf_tune      0.7831325 0.8433735 0.8571429 0.8622366 0.8809524 0.9285714    0
## xgb_default  0.8192771 0.8358434 0.8682587 0.8646654 0.8915663 0.9277108    0
## xgb_tune     0.8192771 0.8438396 0.8571429 0.8638812 0.8889128 0.9397590    0
## svm_default  0.7619048 0.7951807 0.8192771 0.8228110 0.8452381 0.8915663    0
## svm_tune     0.7619048 0.8072289 0.8313253 0.8315755 0.8623671 0.9156627    0
## pls_tune     0.7619048 0.7951807 0.8192771 0.8228110 0.8452381 0.8915663    0
# Visualization of Accuracy Across Models
dotplot(caret_acc_compare, metric = "Accuracy")

it appears that the model labeled “xgb_default” has the highest mean Accuracy as its dot is furthest to the right on the scale. It also has relatively narrow confidence intervals compared to other models with high accuracy, which suggests that the model is not only accurate but also consistent across different resamples.